main <-"\\\\rschfs1x\\userRS\\a-e\\clieberman_RS\\Documents\\rd graph\\replication_final_public_test5"
dat <- "\\\\rschfs1x\\userRS\\a-e\\clieberman_RS\\Documents\\rd graph\\replication_final_public_test5\\data"

setwd(main)

##Load required libraries:
library(haven)
library(zipfR)
library(Hmisc)
library(tidyverse)

## Initialization
responses <- read_dta(paste0(dat, "/survey_merged_rd_estimates_micro.dta")) %>%
  mutate(confident = playerchosenStakes == 1,
         n1 = y == 0 & confident == 1,
         n2 = y == 0 & confident == 0,
         n3 = y == 1 & confident == 0,
         n4 = y == 1 & confident == 1) %>%
  group_by(phase, playerdgp, playertreatment_group, abs(disc)) %>%
  summarize(n1 = sum(n1),
            n2 = sum(n2),
            n3 = sum(n3),
            n4 = sum(n4))
responses$n <- responses$n1 + responses$n2 + responses$n3 + responses$n4  
responses$abs_disc <- responses$`abs(disc)`
responses$abs_disc <- case_when(responses$abs_disc == 0 ~ 0,
                                responses$abs_disc == 1944 ~ 0.1944,
                                responses$abs_disc == 324 ~ 0.324,
                                responses$abs_disc == 54 ~ 0.54,
                                responses$abs_disc == 9 ~ 0.9,
                                responses$abs_disc == 15 ~ 1.5,
                                TRUE ~ -1)
responses$`abs(disc)` <- NULL

##assign each observation the midpoint of its corresponding band, and take averages
conf_midpoints <- function(n1, n2, n3, n4, b) {
  denom <- n1 + n2 + n3 + n4
  n1_sum <- n1 * abs(0.5 - (b / 2))
  n2_sum <- n2 * abs(0.5 - (0.5 + b) / 2)
  n3_sum <- n3 * abs(0.5 - (0.5 + 2 * b) / 2)
  n4_sum <- n4 * abs(0.5 - (1 + 1 - b) / 2)
  return(2 * (n1_sum + n2_sum + n3_sum + n4_sum) / denom)
}

responses <- responses %>%
  mutate(phase_treatment = paste0("Phase ", phase, ", Treatment ", playertreatment_group),
         phase = case_when(phase == 1 ~ "Phase RDD1",
                           phase == 2 ~ "Phase RDD2",
                           phase == 3 ~ "Phase RDD3",
                           phase == 6 ~ "Phase RDD4",
                           TRUE ~ ""),
         confidence_midpoints = conf_midpoints(n1, n2, n3, n4, 1/3)) %>%
  rowwise() %>%
  group_by(phase, playertreatment_group, abs_disc) %>%
  summarize(confidence_midpoints = mean(confidence_midpoints))

label(responses[["phase"]]) <- "Experiment phase"
label(responses[["playertreatment_group"]]) <- "Phase-specific graph treatment"
label(responses[["abs_disc"]]) <- "Discontinuity magnitude"
label(responses[["confidence_midpoints"]]) <- "Participant's imputed subjective probability of correct response"

write_dta(responses, "data/confidence.dta")
